msg_tool\scripts\artemis\ast/
parser.rs1use super::types::*;
2use crate::types::*;
3use crate::utils::encoding::*;
4use crate::utils::escape::unescape_lua_str;
5use anyhow::Result;
6
7pub struct Parser<'a> {
9 str: &'a [u8],
10 pos: usize,
11 len: usize,
12 line: usize,
13 line_index: usize,
14 encoding: Encoding,
15}
16
17impl<'a> Parser<'a> {
18 pub fn new<S: AsRef<[u8]> + ?Sized>(str: &'a S, encoding: Encoding) -> Self {
23 let str = str.as_ref();
24 Parser {
25 str,
26 pos: 0,
27 len: str.len(),
28 line: 1,
29 line_index: 1,
30 encoding,
31 }
32 }
33
34 pub fn try_parse_header(mut self) -> Result<()> {
36 self.erase_whitespace();
37 if self.is_indent(b"astver") {
38 self.parse_indent(b"astver")?;
39 self.parse_equal()?;
40 self.parse_f64()?;
41 } else if self.is_indent(b"astname") {
42 self.parse_indent(b"astname")?;
43 self.parse_equal()?;
44 } else if self.is_indent(b"ast") {
45 self.parse_indent(b"ast")?;
46 self.parse_equal()?;
47 } else {
48 return self.error("expected 'astver', 'astname' or 'ast'");
49 }
50 Ok(())
51 }
52
53 pub fn parse(mut self) -> Result<AstFile> {
55 self.erase_whitespace();
56 let astver = if self.is_indent(b"astver") {
57 self.parse_indent(b"astver")?;
58 self.parse_equal()?;
59 Some(self.parse_f64()?)
60 } else {
61 None
62 };
63 self.erase_whitespace();
64 let mut astname = None;
65 if self.is_indent(b"astname") {
66 self.parse_indent(b"astname")?;
67 self.parse_equal()?;
68 astname = Some(self.parse_any_str()?.to_string());
69 self.erase_whitespace();
70 }
71 self.parse_indent(b"ast")?;
72 self.parse_equal()?;
73 let ast = self.parse_value()?;
74 Ok(AstFile {
75 astver,
76 astname,
77 ast,
78 })
79 }
80
81 fn parse_equal(&mut self) -> Result<()> {
82 self.erase_whitespace();
83 match self.next() {
84 Some(b'=') => Ok(()),
85 _ => self.error("expected '='"),
86 }
87 }
88
89 fn parse_value(&mut self) -> Result<Value> {
90 self.erase_whitespace();
91 match self.peek() {
92 Some(t) => match t {
93 b'"' => return self.parse_str().map(|x| Value::Str(x.to_string())),
94 b'[' => {
95 self.eat_char();
96 match self.peek().ok_or(self.error2("unexpected eof"))? {
97 b'[' => {
98 self.pos -= 1; self.parse_raw_str().map(|x| Value::Str(x))
100 }
101 _ => {
102 self.pos -= 1;
103 self.parse_key_val()
104 }
105 }
106 }
107 b'-' | b'.' | b'0'..=b'9' => return self.parse_any_number(),
108 b'n' => {
109 if self.is_indent(b"nil") {
110 self.pos += 3; Ok(Value::Null)
112 } else {
113 self.parse_key_val()
114 }
115 }
116 b'_' | b'a'..=b'z' | b'A'..=b'Z' | b']' => return self.parse_key_val(),
117 b'{' => return self.parse_array(),
118 _ => return self.error(format!("unexpected token: {}", t)),
119 },
120 None => return self.error("unexpected eof"),
121 }
122 }
123
124 fn parse_array(&mut self) -> Result<Value> {
125 self.erase_whitespace();
126 self.parse_indent(b"{")?;
127 let mut array = Vec::new();
128 loop {
129 self.erase_whitespace();
130 match self.peek() {
131 Some(b'}') => {
132 self.eat_char();
133 break;
134 }
135 Some(_) => {
136 let val = self.parse_value()?;
137 array.push(val);
138 match self.peek() {
139 Some(b',') => {
140 self.eat_char();
141 }
142 _ => {}
143 }
144 }
145 None => return self.error("unexpected eof"),
146 }
147 }
148 Ok(Value::Array(array))
149 }
150
151 fn parse_any_number(&mut self) -> Result<Value> {
152 self.erase_whitespace();
153 let start = self.pos;
154 while let Some(c) = self.peek() {
155 if c == b'.' || c == b'-' || c.is_ascii_digit() {
156 self.eat_char();
157 } else {
158 break;
159 }
160 }
161 let s = std::str::from_utf8(&self.str[start..self.pos])?;
162 if s.contains('.') {
163 s.parse()
164 .map(Value::Float)
165 .map_err(|e| self.error2(format!("failed to parse f64: {}", e)))
166 } else {
167 s.parse()
168 .map(Value::Int)
169 .map_err(|e| self.error2(format!("failed to parse i64: {}", e)))
170 }
171 }
172
173 fn parse_any_str(&mut self) -> Result<String> {
174 self.erase_whitespace();
175 match self.peek().ok_or(self.error2("unexpected eof"))? {
176 b'"' => self.parse_str(),
177 b'[' => self.parse_raw_str(),
178 _ => self.error("expected string or raw string"),
179 }
180 }
181
182 fn parse_f64(&mut self) -> Result<f64> {
183 self.erase_whitespace();
184 let start = self.pos;
185 while let Some(c) = self.peek() {
186 if c == b'.' || c == b'-' || c.is_ascii_digit() {
187 self.eat_char();
188 } else {
189 break;
190 }
191 }
192 let s = std::str::from_utf8(&self.str[start..self.pos])?;
193 s.parse()
194 .map_err(|e| self.error2(format!("failed to parse f64: {}", e)))
195 }
196
197 fn parse_str(&mut self) -> Result<String> {
198 self.erase_whitespace();
199 self.parse_indent(b"\"")?;
200 let start = self.pos;
201 let mut pc = None;
202 let end = loop {
203 match self.next() {
204 Some(c) => {
205 if c == b'"' {
206 if pc.is_none_or(|x| x != b'\\') {
207 break self.pos - 1;
208 }
209 }
210 pc = Some(c);
211 }
212 None => return self.error("unexpected eof"),
213 }
214 };
215 Ok(unescape_lua_str(
216 &decode_to_string(self.encoding, &self.str[start..end], true)
217 .map_err(|e| self.error2(e))?,
218 ))
219 }
220
221 fn parse_raw_str(&mut self) -> Result<String> {
222 self.erase_whitespace();
223 self.parse_indent(b"[[")?;
224 let start = self.pos;
225 let mut pc = None;
226 let end = loop {
227 match self.next() {
228 Some(c) => {
229 if c == b']' {
230 if pc.is_some_and(|x| x == b']') {
231 break self.pos - 2;
232 }
233 }
234 pc = Some(c);
235 }
236 None => return self.error("unexpected eof"),
237 }
238 };
239 decode_to_string(self.encoding, &self.str[start..end], true).map_err(|e| self.error2(e))
240 }
241
242 fn erase_whitespace(&mut self) {
243 while let Some(c) = self.peek() {
244 if c == b' ' || c == b'\t' || c == b'\n' || c == b'\r' {
245 if c == b'\n' {
246 self.line += 1;
247 self.line_index = 1;
248 } else {
249 self.line_index += 1;
250 }
251 self.eat_char();
252 } else {
253 break;
254 }
255 }
256 }
257
258 fn next(&mut self) -> Option<u8> {
259 if self.pos < self.len {
260 let c = self.str[self.pos];
261 self.pos += 1;
262 if c == b'\n' {
263 self.line += 1;
264 self.line_index = 1;
265 } else {
266 self.line_index += 1;
267 }
268 Some(c)
269 } else {
270 None
271 }
272 }
273
274 fn peek(&self) -> Option<u8> {
275 if self.pos < self.len {
276 Some(self.str[self.pos])
277 } else {
278 None
279 }
280 }
281
282 fn parse_key_val(&mut self) -> Result<Value> {
283 let key = self.get_indent()?;
284 self.parse_equal()?;
285 let val = self.parse_value()?;
286 Ok(Value::KeyVal((Box::new(key), Box::new(val))))
287 }
288
289 fn get_indent(&mut self) -> Result<Value> {
290 self.erase_whitespace();
291 let start = self.pos;
292 let mut is_first = true;
293 let end = loop {
294 match self.peek() {
295 Some(t) => match t {
296 b'_' | b'a'..=b'z' | b'A'..=b'Z' | b'"' => self.eat_char(),
297 b'[' => {
298 self.eat_char();
299 let v = self.parse_value()?;
300 let n = self.next().ok_or(self.error2("unexpected eof"))?;
301 if n != b']' {
302 return self.error("expected ']' after key");
303 }
304 return Ok(v);
305 }
306 b'0'..=b'9' => {
307 if is_first {
308 return self.error("unexpected digit");
309 }
310 self.eat_char();
311 }
312 b' ' | b'\t' | b'=' | b'\n' | b'\r' => break self.pos,
313 _ => return self.error("unexpected token"),
314 },
315 None => return self.error("unexpected eof"),
316 }
317 is_first = false;
318 };
319 let mut data = &self.str[start..end];
320 if data.starts_with(b"[\"") && data.ends_with(b"\"]") {
321 data = &data[2..data.len() - 2];
322 }
323 Ok(Value::Str(
324 decode_to_string(self.encoding, data, true).map_err(|e| self.error2(e))?,
325 ))
326 }
327
328 fn is_indent(&self, indent: &[u8]) -> bool {
329 if self.pos + indent.len() > self.len {
330 return false;
331 }
332 for (i, c) in indent.iter().enumerate() {
333 if self.str[self.pos + i] != *c {
334 return false;
335 }
336 }
337 true
338 }
339
340 fn parse_indent(&mut self, indent: &[u8]) -> Result<()> {
341 for c in indent {
342 match self.next() {
343 Some(x) => {
344 if x != *c {
345 return self.error("unexpected indent");
346 }
347 }
348 None => return self.error("unexpected eof"),
349 }
350 }
351 Ok(())
352 }
353
354 fn eat_char(&mut self) {
355 if self.pos < self.len {
356 self.pos += 1;
357 }
358 }
359
360 fn error2<T>(&self, msg: T) -> anyhow::Error
361 where
362 T: std::fmt::Display,
363 {
364 anyhow::Error::msg(format!(
365 "Failed to parse at position line {} column {} (byte {}): {}",
366 self.line, self.line_index, self.pos, msg
367 ))
368 }
369
370 fn error<T, A>(&self, msg: T) -> Result<A>
371 where
372 T: std::fmt::Display,
373 {
374 Err(anyhow::Error::msg(format!(
375 "Failed to parse at position line {} column {} (byte {}): {}",
376 self.line, self.line_index, self.pos, msg
377 )))
378 }
379}